####################################################################################################
### Title: People Consistently View Elections and Civil Liberties as Key Components of Democracy ###
### Content: Additional analysis of the Indian sample                                            ###
### Date: August 24, 2024                                                                        ###
####################################################################################################

### Set-up ----
## Clean the working environment and set the working directory
rm(list = ls())
setwd("~/Desktop/Science_Replication/individual_country")

## Install the projoint package (if not yet installed)
# library(devtools)  # version 2.4.3
# install_github(repo = "yhoriuchi/projoint")

## Load the required packages
library(tidyverse) # version 2.0.0
library(estimatr)  # version 1.0.0
library(cregg)     # version 0.4.0
library(expss)     # version 0.11.4
library(projoint)  # version 0.4.1

## Read the cleaned dataset
df_cj <- read.csv("~/Desktop/Science_Replication/data_cleaning/df_IN.csv")

## Reorder the factors
# Electoral democracy
df_cj$election <- 
  factor(df_cj$election, 
         levels = c("Elections are not held", "Elections are biased", "Elections are free and fair"))

# Liberal democracy
df_cj$civil <- 
  factor(df_cj$civil, 
         levels = c("Civil liberties are not at all protected", "Civil liberties are weakly protected", "Civil liberties are strongly protected"))

# Institutional democracy
df_cj$leader <- 
  factor(df_cj$leader, 
         levels = c("Leader is weakly constrained", "Leader is somewhat constrained", "Leader is highly constrained"))

# Populist democracy
df_cj$populist <- 
  factor(df_cj$populist, 
         levels = c("Leader rarely follows the majority", "Leader sometimes follows the majority", "Leader frequently follows the majority"))

# Loyalist democracy
df_cj$obedient <- 
  factor(df_cj$obedient, 
         levels = c("Dissidents mostly challenge the gov't", "Dissidents occasionally obey the gov't", "Dissidents mostly obey the gov't"))

# Substantive democracy (economy)
df_cj$econ <- 
  factor(df_cj$econ, 
         levels = c("Economic equality is very low", "Economic equality is somewhat low", "Economic equality is high"))

# Substantive democracy (gender)
df_cj$gender <- 
  factor(df_cj$gender, 
         levels = c("Gender equality is very low", "Gender equality is somewhat low", "Gender equality is high"))

# Technocratic democracy
df_cj$expert <- 
  factor(df_cj$expert, 
         levels = c("Experts have small influence on policy", "Experts have some influence on policy", "Experts have much influence on policy"))

# Direct democracy
df_cj$direct <- 
  factor(df_cj$direct, 
         levels = c("Policies are rarely voted on", "Policies are sometimes voted on", "Policies are frequently voted on"))

## Format attribute labels in plots
df_cj <- apply_labels(df_cj,
                      election = "Electoral Democracy",
                      civil = "Liberal Democracy",
                      leader = "Institutional Democracy",
                      populist = "Populist Democracy",
                      obedient = "Loyalist Democracy",
                      econ = "Substantive Democracy - Economy",
                      gender = "Substantive Democracy - Gender",
                      expert = "Technocratic Democracy",
                      direct = "Direct Democracy")

attributes <- "election + civil + leader + populist + obedient + econ + gender + expert + direct"

## Bold feature labels in plots
bph <- c('bold', rep('plain', 3), 'bold', rep('plain', 3),
         'bold', rep('plain', 3), 'bold', rep('plain', 3), 
         'bold', rep('plain', 3), 'bold', rep('plain', 3),
         'bold', rep('plain', 3), 'bold', rep('plain', 3), 
         'bold', rep('plain', 3)) %>%
  rev() # reverse coding

## Create a function that indicates which estimates are statistically significant
## at the 5% level after using the BH procedure to adjust for multiple comparisons
sig1_fun <- function(data){
  data <- data %>% 
    mutate(sig = case_when(
      (p.adjust(p, method = "BH") < 0.05) == TRUE ~ 1, 
      (p.adjust(p, method = "BH") < 0.05) == FALSE ~ 0)) %>% 
    mutate(sig = factor(sig, levels = c(0, 1)))
  return(data)
}

### Figure S16: difference in marginal means by subgroup ----
## By age
df_cj <- df_cj %>% mutate(age_bin = case_when(
  age < 40 ~ 1,
  age > 40 ~ 0
))
df_cj$age_bin <- factor(df_cj$age_bin, 0:1, c("Older", "Younger Than 40"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~age_bin)
diff_mms_age <- sig1_fun(diff_mms)

## By gender
df_cj$gender_bin <- factor(df_cj$gender_bin, levels = c("Male", "Female"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~gender_bin)
diff_mms_gender <- sig1_fun(diff_mms)

## By education
df_cj$edu_bin <- factor(df_cj$edu_bin, levels = c("No College", "College"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~edu_bin)
diff_mms_edu <- sig1_fun(diff_mms)

## By minority status
df_cj$minority_bin <- factor(df_cj$minority_bin, levels = c("Non-Minority", "Minority"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~minority_bin)
diff_mms_minority <- sig1_fun(diff_mms)

## By self-reported political ideology
df_cj$ideo_bin <- factor(df_cj$ideo_bin, levels = c("Left", "Right"), labels = c("Leftwing", "Rightwing"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~ideo_bin)
diff_mms_ideo <- sig1_fun(diff_mms)

## By geopolitical alignment
df_cj$pro_china <- factor(df_cj$pro_china, levels = c("Pro-US", "Pro-China"))
diff_mms <- cj(data = df_cj, 
               formula = as.formula(paste("selected ~", attributes)),
               id = ~id,
               estimate = "mm_diff",
               by = ~pro_china)
diff_mms_china <- sig1_fun(diff_mms)

## Combine the difference-in-MMs plots
combined <- bind_rows(diff_mms_age, diff_mms_gender, diff_mms_edu,
                      diff_mms_minority, diff_mms_ideo, diff_mms_china)
combined$BY <- 
  factor(combined$BY,
         levels = c("Younger Than 40 - Older", "Female - Male", "College - No College",
                    "Minority - Non-Minority", "Rightwing - Leftwing", "Pro-China - Pro-US"))
p <- plot(combined, legend_pos = "none", size = 1) + 
  ggplot2::facet_wrap(~BY, ncol = 3) +
  aes(color = sig) +
  xlab("Difference in Marginal Means of Pr(Being Selected as More Democratic)")
p <- p + scale_color_manual(values = c("grey60", "black"))
p <- p + theme_bw(base_size = 8, base_family = "Times") %+replace%
  theme(
    axis.text.x = element_text(size = 6, colour = "black",  hjust = .5, vjust = 1),
    axis.text.y = element_text(size = 8, colour = "black", hjust = 1, vjust = .5, face = bph),
    axis.ticks = element_line(colour = "grey50"),
    axis.title.y = element_text(size = 10, angle = 90, vjust = .01, hjust = .1),
    legend.position = "none",
    strip.text.x = element_text(size = 8)
  )
p <- p + coord_cartesian(xlim = c(-0.2, 0.2))
ggsave("Figure S16.pdf", width = 6, height = 8)

### Apply projoint ----
## Read the raw dataset
df <- cjdata::read_Qualtrics("~/Desktop/Science_Replication/data_cleaning/raw_IN.csv")
df$Q1.1 <- ifelse(df$Q1.1 == "देश ए", "Country A", "Country B")
df$Q1.5 <- ifelse(df$Q1.5 == "देश ए", "Country A", "Country B")
df$Q1.9 <- ifelse(df$Q1.9 == "देश ए", "Country A", "Country B")

## Reshape the data
df_pj <- reshape_projoint(.dataframe = df, 
                          .outcomes = c("Q1.1", "Q1.5", "Q1.9"),
                          .outcomes_ids = c("A", "B"),
                          .alphabet = "F", 
                          .idvar = "ResponseId", 
                          .repeated = FALSE)

## Arrange the order and labels of attributes and levels
save_labels(df_pj, "labels_original_IN.csv")
df_pj <- read_labels(df_pj, "labels_arranged_IN.csv") # reordered and relabeled manually from labels_original_IN.csv
df_pj@data$att8 <- ifelse(df_pj@data$att8 == "att8:level4", "att8:level3", df_pj@data$att8)

## Predict IRR based on the extrapolation method
predicted_irr <- predict_tau(df_pj)

### Figure S6: estimate MMs at the profile level using the predicted IRR ----
mm <- projoint(.data = df_pj, .irr = predicted_irr@irr$predicted[1])
mm@estimates <- mm@estimates[complete.cases(mm@estimates), ]
plot(mm, .estimates = "both") + 
  scale_color_manual(values = c("black", "grey80")) +
  theme(text = element_text(color = "black", size = 12, family = "Times"),
        axis.text = element_text(color = "black", family = "Times", size = 11)) +
  coord_cartesian(xlim = c(0.1, 0.9))
ggsave("Figure S6.pdf", width = 12, height = 8)
